This notebook uses cython, which requires a C compiler. Linux comes with a compiler. Install xcode for OSX and Visual Studio for windows.


In [1]:
%load_ext cython

In [2]:
import array
a = array.array('l',range(100))
s = 0

Sum up an array of numbers using python


In [3]:
def python_sum(a):
    global s
    s = 0
    for i in range(len(a)):
        for j in range(10000):
            s = s + a[i]
    return s

In [4]:
%timeit python_sum(a)


1 loop, best of 3: 142 ms per loop

Use cython, without changing the code


In [5]:
%%cython --annotate
def cython_sum1(a):
    global s
    s = 0
    for i in range(len(a)):
        for j in range(10000):
            s = s + a[i]
    return s


Out[5]:
Cython: _cython_magic_68d17ec69c8149145d5b978ab2434b12.pyx

Generated by Cython 0.23.4

Yellow lines hint at Python interaction.
Click on a line that starts with a "+" to see the C code that Cython generated for it.

+1: def cython_sum1(a):
/* Python wrapper */
static PyObject *__pyx_pw_46_cython_magic_68d17ec69c8149145d5b978ab2434b12_1cython_sum1(PyObject *__pyx_self, PyObject *__pyx_v_a); /*proto*/
static PyMethodDef __pyx_mdef_46_cython_magic_68d17ec69c8149145d5b978ab2434b12_1cython_sum1 = {"cython_sum1", (PyCFunction)__pyx_pw_46_cython_magic_68d17ec69c8149145d5b978ab2434b12_1cython_sum1, METH_O, 0};
static PyObject *__pyx_pw_46_cython_magic_68d17ec69c8149145d5b978ab2434b12_1cython_sum1(PyObject *__pyx_self, PyObject *__pyx_v_a) {
  PyObject *__pyx_r = 0;
  __Pyx_RefNannyDeclarations
  __Pyx_RefNannySetupContext("cython_sum1 (wrapper)", 0);
  __pyx_r = __pyx_pf_46_cython_magic_68d17ec69c8149145d5b978ab2434b12_cython_sum1(__pyx_self, ((PyObject *)__pyx_v_a));

  /* function exit code */
  __Pyx_RefNannyFinishContext();
  return __pyx_r;
}

static PyObject *__pyx_pf_46_cython_magic_68d17ec69c8149145d5b978ab2434b12_cython_sum1(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_a) {
  Py_ssize_t __pyx_v_i;
  CYTHON_UNUSED long __pyx_v_j;
  PyObject *__pyx_r = NULL;
  __Pyx_RefNannyDeclarations
  __Pyx_RefNannySetupContext("cython_sum1", 0);
/* … */
  /* function exit code */
  __pyx_L1_error:;
  __Pyx_XDECREF(__pyx_t_4);
  __Pyx_XDECREF(__pyx_t_5);
  __Pyx_XDECREF(__pyx_t_6);
  __Pyx_AddTraceback("_cython_magic_68d17ec69c8149145d5b978ab2434b12.cython_sum1", __pyx_clineno, __pyx_lineno, __pyx_filename);
  __pyx_r = NULL;
  __pyx_L0:;
  __Pyx_XGIVEREF(__pyx_r);
  __Pyx_RefNannyFinishContext();
  return __pyx_r;
}
/* … */
  __pyx_tuple_ = PyTuple_Pack(3, __pyx_n_s_a, __pyx_n_s_i, __pyx_n_s_j); if (unlikely(!__pyx_tuple_)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_tuple_);
  __Pyx_GIVEREF(__pyx_tuple_);
/* … */
  __pyx_t_1 = PyCFunction_NewEx(&__pyx_mdef_46_cython_magic_68d17ec69c8149145d5b978ab2434b12_1cython_sum1, NULL, __pyx_n_s_cython_magic_68d17ec69c8149145d); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_t_1);
  if (PyDict_SetItem(__pyx_d, __pyx_n_s_cython_sum1, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
 2:     global s
+3:     s = 0
  if (PyDict_SetItem(__pyx_d, __pyx_n_s_s, __pyx_int_0) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 3; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+4:     for i in range(len(a)):
  __pyx_t_1 = PyObject_Length(__pyx_v_a); if (unlikely(__pyx_t_1 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 4; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) {
    __pyx_v_i = __pyx_t_2;
+5:         for j in range(10000):
    for (__pyx_t_3 = 0; __pyx_t_3 < 0x2710; __pyx_t_3+=1) {
      __pyx_v_j = __pyx_t_3;
+6:             s = s + a[i]
      __pyx_t_4 = __Pyx_GetModuleGlobalName(__pyx_n_s_s); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 6; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
      __Pyx_GOTREF(__pyx_t_4);
      __pyx_t_5 = __Pyx_GetItemInt(__pyx_v_a, __pyx_v_i, Py_ssize_t, 1, PyInt_FromSsize_t, 0, 1, 1); if (unlikely(__pyx_t_5 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 6; __pyx_clineno = __LINE__; goto __pyx_L1_error;};
      __Pyx_GOTREF(__pyx_t_5);
      __pyx_t_6 = PyNumber_Add(__pyx_t_4, __pyx_t_5); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 6; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
      __Pyx_GOTREF(__pyx_t_6);
      __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
      __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
      if (PyDict_SetItem(__pyx_d, __pyx_n_s_s, __pyx_t_6) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 6; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
      __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0;
    }
  }
+7:     return s
  __Pyx_XDECREF(__pyx_r);
  __pyx_t_6 = __Pyx_GetModuleGlobalName(__pyx_n_s_s); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 7; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_t_6);
  __pyx_r = __pyx_t_6;
  __pyx_t_6 = 0;
  goto __pyx_L0;

In [6]:
print('python sum: ',python_sum(a))
print('cython sum1: ',cython_sum1(a))
print('python sum')
%timeit python_sum(a)
print('cython sum1')
%timeit cython_sum1(a)


python sum:  49500000
cython sum1:  49500000
python sum
1 loop, best of 3: 134 ms per loop
cython sum1
10 loops, best of 3: 63.2 ms per loop

Does making s a local variable help?


In [7]:
%%cython --annotate
def cython_sum2(a):
    s = 0
    for i in range(len(a)):
        for j in range(10000):
            s = s + a[i]
    return s


Out[7]:
Cython: _cython_magic_aee3ae35bc58738c5d75dbcb8db7d665.pyx

Generated by Cython 0.23.4

Yellow lines hint at Python interaction.
Click on a line that starts with a "+" to see the C code that Cython generated for it.

+1: def cython_sum2(a):
/* Python wrapper */
static PyObject *__pyx_pw_46_cython_magic_aee3ae35bc58738c5d75dbcb8db7d665_1cython_sum2(PyObject *__pyx_self, PyObject *__pyx_v_a); /*proto*/
static PyMethodDef __pyx_mdef_46_cython_magic_aee3ae35bc58738c5d75dbcb8db7d665_1cython_sum2 = {"cython_sum2", (PyCFunction)__pyx_pw_46_cython_magic_aee3ae35bc58738c5d75dbcb8db7d665_1cython_sum2, METH_O, 0};
static PyObject *__pyx_pw_46_cython_magic_aee3ae35bc58738c5d75dbcb8db7d665_1cython_sum2(PyObject *__pyx_self, PyObject *__pyx_v_a) {
  PyObject *__pyx_r = 0;
  __Pyx_RefNannyDeclarations
  __Pyx_RefNannySetupContext("cython_sum2 (wrapper)", 0);
  __pyx_r = __pyx_pf_46_cython_magic_aee3ae35bc58738c5d75dbcb8db7d665_cython_sum2(__pyx_self, ((PyObject *)__pyx_v_a));

  /* function exit code */
  __Pyx_RefNannyFinishContext();
  return __pyx_r;
}

static PyObject *__pyx_pf_46_cython_magic_aee3ae35bc58738c5d75dbcb8db7d665_cython_sum2(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_a) {
  PyObject *__pyx_v_s = NULL;
  Py_ssize_t __pyx_v_i;
  CYTHON_UNUSED long __pyx_v_j;
  PyObject *__pyx_r = NULL;
  __Pyx_RefNannyDeclarations
  __Pyx_RefNannySetupContext("cython_sum2", 0);
/* … */
  /* function exit code */
  __pyx_L1_error:;
  __Pyx_XDECREF(__pyx_t_4);
  __Pyx_XDECREF(__pyx_t_5);
  __Pyx_AddTraceback("_cython_magic_aee3ae35bc58738c5d75dbcb8db7d665.cython_sum2", __pyx_clineno, __pyx_lineno, __pyx_filename);
  __pyx_r = NULL;
  __pyx_L0:;
  __Pyx_XDECREF(__pyx_v_s);
  __Pyx_XGIVEREF(__pyx_r);
  __Pyx_RefNannyFinishContext();
  return __pyx_r;
}
/* … */
  __pyx_tuple_ = PyTuple_Pack(4, __pyx_n_s_a, __pyx_n_s_s, __pyx_n_s_i, __pyx_n_s_j); if (unlikely(!__pyx_tuple_)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_tuple_);
  __Pyx_GIVEREF(__pyx_tuple_);
/* … */
  __pyx_t_1 = PyCFunction_NewEx(&__pyx_mdef_46_cython_magic_aee3ae35bc58738c5d75dbcb8db7d665_1cython_sum2, NULL, __pyx_n_s_cython_magic_aee3ae35bc58738c5d); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_t_1);
  if (PyDict_SetItem(__pyx_d, __pyx_n_s_cython_sum2, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+2:     s = 0
  __Pyx_INCREF(__pyx_int_0);
  __pyx_v_s = __pyx_int_0;
+3:     for i in range(len(a)):
  __pyx_t_1 = PyObject_Length(__pyx_v_a); if (unlikely(__pyx_t_1 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 3; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) {
    __pyx_v_i = __pyx_t_2;
+4:         for j in range(10000):
    for (__pyx_t_3 = 0; __pyx_t_3 < 0x2710; __pyx_t_3+=1) {
      __pyx_v_j = __pyx_t_3;
+5:             s = s + a[i]
      __pyx_t_4 = __Pyx_GetItemInt(__pyx_v_a, __pyx_v_i, Py_ssize_t, 1, PyInt_FromSsize_t, 0, 1, 1); if (unlikely(__pyx_t_4 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 5; __pyx_clineno = __LINE__; goto __pyx_L1_error;};
      __Pyx_GOTREF(__pyx_t_4);
      __pyx_t_5 = PyNumber_Add(__pyx_v_s, __pyx_t_4); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 5; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
      __Pyx_GOTREF(__pyx_t_5);
      __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
      __Pyx_DECREF_SET(__pyx_v_s, __pyx_t_5);
      __pyx_t_5 = 0;
    }
  }
+6:     return s
  __Pyx_XDECREF(__pyx_r);
  __Pyx_INCREF(__pyx_v_s);
  __pyx_r = __pyx_v_s;
  goto __pyx_L0;

In [8]:
print('python sum: ',python_sum(a))
print('cython sum1: ',cython_sum1(a))
print('cython sum2: ',cython_sum2(a))
print('python sum')
%timeit python_sum(a)
print('cython sum1')
%timeit cython_sum1(a)
print('cython sum2')
%timeit cython_sum2(a)


python sum:  49500000
cython sum1:  49500000
cython sum2:  49500000
python sum
1 loop, best of 3: 393 ms per loop
cython sum1
10 loops, best of 3: 56.7 ms per loop
cython sum2
10 loops, best of 3: 39.5 ms per loop

In [9]:
%%cython --annotate
from cpython cimport array

def cython_sum3(a):
    cdef long s = 0
    cdef array.array ta = a
    cdef long * ap = ta.data.as_longs
    for i in range(len(ta)):
        for j in range(10000):
            s = s + ap[i]
    return s


Out[9]:
Cython: _cython_magic_e2a58310d0a5b85a70965375c098a182.pyx

Generated by Cython 0.23.4

Yellow lines hint at Python interaction.
Click on a line that starts with a "+" to see the C code that Cython generated for it.

+01: from cpython cimport array
  __pyx_t_1 = PyDict_New(); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_t_1);
  if (PyDict_SetItem(__pyx_d, __pyx_n_s_test, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
 02: 
+03: def cython_sum3(a):
/* Python wrapper */
static PyObject *__pyx_pw_46_cython_magic_e2a58310d0a5b85a70965375c098a182_1cython_sum3(PyObject *__pyx_self, PyObject *__pyx_v_a); /*proto*/
static PyMethodDef __pyx_mdef_46_cython_magic_e2a58310d0a5b85a70965375c098a182_1cython_sum3 = {"cython_sum3", (PyCFunction)__pyx_pw_46_cython_magic_e2a58310d0a5b85a70965375c098a182_1cython_sum3, METH_O, 0};
static PyObject *__pyx_pw_46_cython_magic_e2a58310d0a5b85a70965375c098a182_1cython_sum3(PyObject *__pyx_self, PyObject *__pyx_v_a) {
  PyObject *__pyx_r = 0;
  __Pyx_RefNannyDeclarations
  __Pyx_RefNannySetupContext("cython_sum3 (wrapper)", 0);
  __pyx_r = __pyx_pf_46_cython_magic_e2a58310d0a5b85a70965375c098a182_cython_sum3(__pyx_self, ((PyObject *)__pyx_v_a));

  /* function exit code */
  __Pyx_RefNannyFinishContext();
  return __pyx_r;
}

static PyObject *__pyx_pf_46_cython_magic_e2a58310d0a5b85a70965375c098a182_cython_sum3(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_a) {
  long __pyx_v_s;
  arrayobject *__pyx_v_ta = 0;
  long *__pyx_v_ap;
  Py_ssize_t __pyx_v_i;
  CYTHON_UNUSED long __pyx_v_j;
  PyObject *__pyx_r = NULL;
  __Pyx_RefNannyDeclarations
  __Pyx_RefNannySetupContext("cython_sum3", 0);
/* … */
  /* function exit code */
  __pyx_L1_error:;
  __Pyx_XDECREF(__pyx_t_1);
  __Pyx_AddTraceback("_cython_magic_e2a58310d0a5b85a70965375c098a182.cython_sum3", __pyx_clineno, __pyx_lineno, __pyx_filename);
  __pyx_r = NULL;
  __pyx_L0:;
  __Pyx_XDECREF((PyObject *)__pyx_v_ta);
  __Pyx_XGIVEREF(__pyx_r);
  __Pyx_RefNannyFinishContext();
  return __pyx_r;
}
/* … */
  __pyx_tuple_ = PyTuple_Pack(6, __pyx_n_s_a, __pyx_n_s_s, __pyx_n_s_ta, __pyx_n_s_ap, __pyx_n_s_i, __pyx_n_s_j); if (unlikely(!__pyx_tuple_)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 3; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_tuple_);
  __Pyx_GIVEREF(__pyx_tuple_);
/* … */
  __pyx_t_1 = PyCFunction_NewEx(&__pyx_mdef_46_cython_magic_e2a58310d0a5b85a70965375c098a182_1cython_sum3, NULL, __pyx_n_s_cython_magic_e2a58310d0a5b85a70); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 3; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_t_1);
  if (PyDict_SetItem(__pyx_d, __pyx_n_s_cython_sum3, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 3; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+04:     cdef long s = 0
  __pyx_v_s = 0;
+05:     cdef array.array ta = a
  if (!(likely(((__pyx_v_a) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_a, __pyx_ptype_7cpython_5array_array))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 5; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __pyx_t_1 = __pyx_v_a;
  __Pyx_INCREF(__pyx_t_1);
  __pyx_v_ta = ((arrayobject *)__pyx_t_1);
  __pyx_t_1 = 0;
+06:     cdef long * ap = ta.data.as_longs
  __pyx_t_2 = __pyx_v_ta->data.as_longs;
  __pyx_v_ap = __pyx_t_2;
+07:     for i in range(len(ta)):
  if (unlikely(((PyObject *)__pyx_v_ta) == Py_None)) {
    PyErr_SetString(PyExc_TypeError, "object of type 'NoneType' has no len()");
    {__pyx_filename = __pyx_f[0]; __pyx_lineno = 7; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  }
  __pyx_t_3 = Py_SIZE(((PyObject *)__pyx_v_ta)); if (unlikely(__pyx_t_3 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 7; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  for (__pyx_t_4 = 0; __pyx_t_4 < __pyx_t_3; __pyx_t_4+=1) {
    __pyx_v_i = __pyx_t_4;
+08:         for j in range(10000):
    for (__pyx_t_5 = 0; __pyx_t_5 < 0x2710; __pyx_t_5+=1) {
      __pyx_v_j = __pyx_t_5;
+09:             s = s + ap[i]
      __pyx_v_s = (__pyx_v_s + (__pyx_v_ap[__pyx_v_i]));
    }
  }
+10:     return s
  __Pyx_XDECREF(__pyx_r);
  __pyx_t_1 = __Pyx_PyInt_From_long(__pyx_v_s); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 10; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_t_1);
  __pyx_r = __pyx_t_1;
  __pyx_t_1 = 0;
  goto __pyx_L0;

In [10]:
print('python sum: ',python_sum(a))
print('cython sum1: ',cython_sum1(a))
print('cython sum2: ',cython_sum2(a))
print('cython sum3: ',cython_sum3(a))
print('python sum')
%timeit python_sum(a)
print('cython sum1')
%timeit cython_sum1(a)
print('cython sum2')
%timeit cython_sum2(a)
print('cython sum3')
%timeit cython_sum3(a)


python sum:  49500000
cython sum1:  49500000
cython sum2:  49500000
cython sum3:  49500000
python sum
1 loop, best of 3: 415 ms per loop
cython sum1
10 loops, best of 3: 58.8 ms per loop
cython sum2
10 loops, best of 3: 50.8 ms per loop
cython sum3
The slowest run took 31.76 times longer than the fastest. This could mean that an intermediate result is being cached.
10000000 loops, best of 3: 193 ns per loop

In [11]:
from numba import jit
@jit
def numba_sum(a):
    s = 0
    for i in range(len(a)):
        for j in range(10000):
            s = s + a[i]
    return s

In [13]:
print('python sum: ',python_sum(a))
print('cython sum1: ',cython_sum1(a))
print('cython sum2: ',cython_sum2(a))
print('cython sum3: ',cython_sum3(a))
print('numba sum: ', numba_sum(a))
print('python sum')
%timeit python_sum(a)
print('cython sum1')
%timeit cython_sum1(a)
print('cython sum2')
%timeit cython_sum2(a)
print('cython sum3')
%timeit cython_sum3(a)
print('numba sum')
%timeit numba_sum(a)


python sum:  49500000
cython sum1:  49500000
cython sum2:  49500000
cython sum3:  49500000
numba sum:  49500000
python sum
1 loop, best of 3: 165 ms per loop
cython sum1
10 loops, best of 3: 69 ms per loop
cython sum2
10 loops, best of 3: 36.5 ms per loop
cython sum3
The slowest run took 27.89 times longer than the fastest. This could mean that an intermediate result is being cached.
1000000 loops, best of 3: 180 ns per loop
numba sum
The slowest run took 20.09 times longer than the fastest. This could mean that an intermediate result is being cached.
1000000 loops, best of 3: 765 ns per loop